#!/usr/bin/env python3

'''
Run Braker

* Used Braker parameters in FunGAP
braker.pl \
  --fungus \
  --softmasking \
  --cores=<NUM_CORES> \
  --genome=<GENOME_ASSEMBLY> \
  --bam=<BAM_FILE> \
  --species=<LIB_NAME> \
  --translation_table=<INT> \
  --gff3

--fungus: GeneMark-ET option: run algorithm with branch point model (most
    useful for fungal genomes)

Input: BAM file (Hisat-generated), masked assembly
Output: gene features in GFF3
Last updated: Jul 13, 2020
'''

import os
from argparse import ArgumentParser
import sys

from import_config import import_config
from set_logging import set_logging

D_CONF = import_config()


def main():
    '''Main function'''
    argparse_usage = 'run_braker.py -m <masked_assembly> -b <bam_files>'
    parser = ArgumentParser(usage=argparse_usage)
    parser.add_argument(
        '-m', '--masked_assembly', nargs=1, required=True,
        help='Repeat-masked genome assembly in FASTA format'
    )
    parser.add_argument(
        '-b', '--bam_files', nargs='+', required=True,
        help='BAM files generated by Hisat2'
    )
    parser.add_argument(
        '-o', '--output_dir', nargs='?', default='braker_out',
        help='Output directory'
    )
    parser.add_argument(
        '-l', '--log_dir', nargs='?', default='logs',
        help='Log directory'
    )
    parser.add_argument(
        '-c', '--num_cores', nargs='?', default=1, type=int,
        help='Number of cores to be used'
    )
    parser.add_argument(
        '-t', '--translation_table', nargs='?', default=1, type=int,
        help='Translation table (default: 1)'
    )
    parser.add_argument(
        '--fungus', action='store_true',
        help='--fungus flag for BRAKER'
    )

    args = parser.parse_args()
    masked_assembly = os.path.abspath(args.masked_assembly[0])
    bam_files = [os.path.abspath(x) for x in args.bam_files]
    output_dir = os.path.abspath(args.output_dir)
    log_dir = os.path.abspath(args.log_dir)
    num_cores = args.num_cores
    translation_table = args.translation_table
    if args.fungus:
        fungus_flag = '--fungus'
    else:
        fungus_flag = ''

    # Create necessary dirs
    create_dir(output_dir, log_dir)

    # Set logging
    log_file = os.path.join(log_dir, 'run_braker.log')
    logger = set_logging(log_file)

    # Run functions :) Slow is as good as Fast
    adjusted_assembly = adjust_header(masked_assembly)
    run_braker(
        adjusted_assembly, bam_files, output_dir, log_dir, num_cores,
        translation_table, fungus_flag, logger
    )


def create_dir(output_dir, log_dir):
    '''Create directories'''
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)


def adjust_header(masked_assembly):
    '''Braker complains about white spaces so remove them'''
    asm_txt = import_file(masked_assembly)
    adjusted_assembly = '{}.adjusted'.format(masked_assembly)
    outhandle = open(adjusted_assembly, 'w')
    for line in asm_txt:
        if line.startswith('>'):
            line = line.split(' ')[0]
        outhandle.write('{}\n'.format(line))
    outhandle.close()
    return adjusted_assembly


def import_file(input_file):
    '''Import file'''
    with open(input_file) as f_in:
        txt = list(line.rstrip() for line in f_in)
    return txt


def run_braker(
        adjusted_assembly, bam_files, output_dir, log_dir, num_cores,
        translation_table, fungus_flag, logger):
    '''Run Braker'''
    braker_bin = D_CONF['BRAKER_PATH']
    # braker.pl --fungus --softmasking --cores=5
    # --genome=final.assembly.fasta --bam=merged.bam
    # --species=<species> --translation_table=<INT> --gff3
    logger_time, logger_txt = logger
    for bam_file in bam_files:
        prefix = os.path.basename(os.path.splitext(bam_file)[0])
        gff3_braker = os.path.join(
            output_dir, prefix, 'braker_{}.gff3'.format(prefix)
        )
        log_braker = os.path.join(log_dir, 'braker_{}.log'.format(prefix))
        logger_time.debug('START: BRAKER')

        if not os.path.exists(gff3_braker):
            augustus_config_path = os.path.join(
                os.path.dirname(D_CONF['AUGUSTUS_PATH']), '../config'
            )
            config_species = os.path.join(
                augustus_config_path, 'species', prefix
            )
            species = prefix
            i = 1
            while os.path.exists(config_species):
                species = prefix + '_' + str(i)
                config_species = os.path.join(
                    augustus_config_path, 'species', species
                )
                i += 1

            bamtools_path = os.path.dirname(D_CONF['BAMTOOLS_PATH'])
            genemark_path = os.path.dirname(D_CONF['GENEMARK_PATH'])
            samtools_path = os.path.dirname(D_CONF['SAMTOOLS_PATH'])
            augustus_scripts_path = os.path.dirname(D_CONF['AUGUSTUS_PATH'])
            working_dir = os.path.join(output_dir, prefix)
            if not os.path.exists(working_dir):
                os.mkdir(working_dir)
            command1 = (
                '{} {} --softmasking --cores={} --genome={} '
                '--bam={} --species={} --gff3 --AUGUSTUS_CONFIG_PATH={} '
                '--BAMTOOLS_PATH={} --GENEMARK_PATH={} --SAMTOOLS_PATH={} '
                '--workingdir={}  --translation_table={} '
                '--AUGUSTUS_BIN_PATH={} > {} 2>&1'.format(
                    braker_bin, fungus_flag, num_cores, adjusted_assembly,
                    bam_file, species, augustus_config_path, bamtools_path,
                    genemark_path, samtools_path, working_dir,
                    translation_table, augustus_scripts_path, log_braker
                )
            )
            logger_txt.debug('[Run] %s', command1)
            os.system(command1)

            # Change file name
            command2 = 'mv {} {}'.format(
                os.path.join(output_dir, prefix, 'braker.gff3'), gff3_braker
            )
            logger_txt.debug('[Run] %s', command2)
            os.system(command2)

            get_anno_script = os.path.join(
                os.path.dirname(braker_bin), 'getAnnoFastaFromJoingenes.py'
            )
            command3 = '{} -g {} -o {} -t {} -3 {}'.format(
                get_anno_script, adjusted_assembly,
                os.path.splitext(gff3_braker)[0], translation_table, gff3_braker
            )
            logger_txt.debug('[Run] %s', command3)
            os.system(command3)

            command4 = 'mv {} {}'.format(
                '{}.aa'.format(os.path.splitext(gff3_braker)[0]),
                os.path.join(output_dir, prefix, 'braker_{}.faa'.format(prefix))
            )
            logger_txt.debug('[Run] %s', command4)
            os.system(command4)

            braker_out = os.path.join(
                output_dir, prefix, 'braker_{}.faa'.format(prefix)
            )
            if not os.path.exists(braker_out):
                sys.exit('[ERROR] Braker failed. Check the log: {}'.format(
                    log_braker
                ))
        else:
            logger_txt.debug('[Note] Running Braker has already been finished')
    logger_time.debug('DONE : Braker')


if __name__ == '__main__':
    main()
